# Create the following variables
# "SCI_Flooded_Distant"
# "SCI_MichaelArea"
# "SCI_HighIncome"
# "SCI_LowIncome"

# Setup -----
library(readxl)
library(tidyverse) 
library(tidycensus)





##############################################################################################
# Download & load FEMA's disaster declarations summaries dataset
# SOURCE: https://www.fema.gov/about/openfema/data-sets, "OpenFEMA Dataset: Disaster Declarations Summaries"
# Download and load data as "DisasterDeclarationsSummaries"


DisasterDeclarationsSummaries = DisasterDeclarationsSummaries %>%
  mutate(
    declarationDate = as.Date(declarationDate),                                                                    # convert to date format
    fipsStateCountyCode = paste0(ifelse(nchar(fipsStateCode)==1, paste0("0", fipsStateCode), fipsStateCode),       # create FIPS state county 5-digit code
                                 
                                 ifelse(nchar(fipsCountyCode)==1, paste0("00", fipsCountyCode),
                                 ifelse(nchar(fipsCountyCode)==2, paste0("0", fipsCountyCode), fipsCountyCode)))
  )




# List of FL counties that declared individual assistance for Hurricane Michael ---------------

Michael_IA_list =  DisasterDeclarationsSummaries %>%    
  filter(ihProgramDeclared==1 | iaProgramDeclared==1) %>%
  filter(declarationTitle=="HURRICANE MICHAEL") %>%
  filter(fipsStateCode==12)


   


# List of counties that had flood-related disaster declarations from 2016-2017 -----------------
DisasterDeclarationsSummaries_flood_1617 = DisasterDeclarationsSummaries %>% 
  filter(incidentType=="Severe Storm" |
         incidentType=="Hurricane" |
         incidentType=="Coastal Storm" |
         incidentType=="Flood" |
         incidentType=="Typhoon") %>%
  filter(declarationDate>=as.Date("2016-01-01") &
         declarationDate<as.Date("2018-01-01")) 






##############################################################################################
# Load Tract-to-ZIP and ZIP-to-County crosswalk files from HUD (SOURCE: https://www.huduser.gov/portal/datasets/usps_crosswalk.html)
# Load file that indicates list of counties within 750 miles of Michael-affected areas (Extracted with ArcGIS Pro using 2017 U.S. county boundary shapefile from Census Bureau)

TRACT_ZIP_122017        <- read_excel("/02_Data/TRACT_ZIP_122017.xlsx")         # HUD's Tract-to-ZIP crosswalk file
ZIP_COUNTY_122017       <- read_excel("/02_Data/ZIP_COUNTY_122017.xlsx")        # HUD's ZIP-to-County crosswalk file
Michael_county_750miles <- read_excel("/02_Data/Michael_county_750miles.xlsx")  # List of counties within 750 miles of Michael-affected areas


us <- unique(fips_codes$state_code)[1:51]  # list of U.S. state (fips code)
TRACT_ZIP_122017$zip  = as.numeric(TRACT_ZIP_122017$zip)
ZIP_COUNTY_122017$zip = as.numeric(ZIP_COUNTY_122017$zip)





# List of ZIPs that had flood-related disaster declarations from 2016-2017 ------------------

ZIP_flood_1617 = ZIP_COUNTY_122017 %>%                     
  mutate(fips_state = substr(county, 1, 2)) %>%
  filter(fips_state %in% us) %>%
  filter(county %in% DisasterDeclarationsSummaries_flood_1617$fipsStateCountyCode) %>%
  filter(res_ratio>0)




# List of ZIPs that are 750 miles away from Michael-affected areas ---------------------------
ZIP_750MilesAway_Michael = ZIP_COUNTY_122017 %>%            
  mutate(fips_state = substr(county, 1, 2)) %>%
  filter(fips_state %in% us) %>%
  filter(!(county %in% Michael_IA_list$fipsStateCountyCode)) %>%
  filter(!(county %in% Michael_county_750miles$GEOID))







##############################################################################################
# 2017 zip-level population & median income from ACS 5-year 

zip0 <- get_acs(geography = "zcta", 
                variables = c(population ="B01003_001",
                              medincome = "B19013_001"), 
                year=2017,
                survey = "acs5")

zip0$moe <- NULL
zip2017 = zip0 %>% distinct(GEOID, NAME, .keep_all = F)
for (i in 1:length(unique(zip0$variable))) {
  test_i = zip0[zip0$variable==unique(zip0$variable)[i], "estimate"]
  colnames(test_i)[which(names(test_i) == "estimate")] <- paste0(unique(zip0$variable)[i], "2017")
  zip2017 = cbind.data.frame(zip2017, test_i)
}

zip2017$GEOID = as.integer(zip2017$GEOID)


# 2017 state-level median income from ACS 5-year 
state0 <- get_acs(geography = "state", 
                  variables = c(
                    state_medincome2017 = "B19013_001"), 
                  year=2017,
                  survey = "acs5")

state0 = state0 %>% select(GEOID_state = GEOID, state_medincome2017 = estimate)




# ZIP-State crosswalk, join in state-level median income info
ZIP_STATE = ZIP_COUNTY_122017 %>% 
  mutate(fips_state = substr(county, 1, 2)) %>%
  group_by(zip, fips_state) %>% 
  summarise(res_ratio = sum(res_ratio)) %>%
  filter(res_ratio==1) %>%
  left_join(state0, by=c("fips_state"="GEOID_state"))








##############################################################################################
# Download & load Facebook Social Connectedness Index data
# SOURCE: https://data.humdata.org/dataset/social-connectedness-index, "US zip code-US zip code"
# Download and load data as "SCI"

# SCI <- read.delim("/us-zip-code-us-zip-code-fb-social-connectedness-index-october-2021/zcta_zcta_shard3.tsv")


# create ZIP-level SCI Variables -------------------------------------------------------------
SCI_v2 = SCI %>% 
  filter(user_loc %in% unique(DIA_Michael_TRACT_ZIP$zip)) %>%                        # First zip -- only keep ZIP codes affected by Hurricane Michael
  left_join(zip2017, by=c("fr_loc"="GEOID")) %>%                                     # Second zip (friend zip) -- join in zip-level population & median income data
  filter(!is.na(population2017)) %>%
  left_join(ZIP_STATE[, c("zip", "fips_state", "state_medincome2017")], 
            by=c("fr_loc"="zip")) %>%                                                # Second zip (friend zip) -- join in state median income
  mutate(
    fr_loc_distant    = ifelse(fr_loc %in% ZIP_750MilesAway_Michael$zip, 1, 0),      # Second zip (friend zip) -- 750 miles away from Michael-affected areas
    fr_loc_flood_1617 = ifelse(fr_loc %in% ZIP_flood_1617$zip, 1, 0),                # Second zip (friend zip) -- had flood-related disaster declarations from 2016-2017
    
    fr_loc_DIA_Michael        = ifelse(fr_loc %in% DIA_Michael_TRACT_ZIP$zip, 1, 0), # Second zip (friend zip) -- Michael-affected areas
    fr_loc_medincome2017_High = ifelse(medincome2017 > state_medincome2017, 1, 0),   # Second zip (friend zip) -- High-income areas
    fr_loc_medincome2017_Low  = ifelse(medincome2017 <= state_medincome2017, 1, 0)   # Second zip (friend zip) -- Low-income areas
  ) %>%
  group_by(user_loc) %>%
  summarise(
    SCI_flood_1617_zip_IV_w     = sum(scaled_sci*population2017*fr_loc_distant*fr_loc_flood_1617)/sum(population2017),      # ZIP-level SCI_Flooded_Distant
    SCI_FL_DIA_Michael_zip_w    = sum(scaled_sci*population2017*fr_loc_DIA_Michael)/sum(population2017),                    # ZIP-level SCI_MichaelArea
    SCI_HighIncome_Nation_zip_w = sum(scaled_sci*population2017*fr_loc_medincome2017_High, na.rm = T)/sum(population2017),  # ZIP-level SCI_HighIncome
    SCI_LowIncome_Nation_zip_w  = sum(scaled_sci*population2017*fr_loc_medincome2017_Low, na.rm = T)/sum(population2017)    # ZIP-level SCI_LowIncome
  )







# Convert zip-level SCI Variables into tract-level SCI Variables ------------------------------


DIA_Michael_TRACT_ZIP = TRACT_ZIP_122017 %>%                         # Tract-to-ZIP crosswalk for Michael-affected areas
  mutate(fips_county = substr(tract, 1, 5)) %>%
  filter(fips_county %in% Michael_IA_list$fipsStateCountyCode) %>%
  filter(res_ratio>0)


SCI_v3 = DIA_Michael_TRACT_ZIP %>%
  mutate(zip = as.numeric(zip)) %>%
  left_join(SCI_v2, by=c("zip"="user_loc")) %>%
  filter(!is.na(SCI_flood_1617_zip_IV_w)) %>%
  group_by(tract) %>%
  summarise(
    SCI_Flooded_Distant = sum(SCI_flood_1617_zip_IV_w*res_ratio)/sum(res_ratio),      # tract-level SCI_Flooded_Distant
    SCI_MichaelArea     = sum(SCI_FL_DIA_Michael_zip_w*res_ratio)/sum(res_ratio),     # tract-level SCI_MichaelArea
    SCI_HighIncome      = sum(SCI_HighIncome_Nation_zip_w*res_ratio)/sum(res_ratio),  # tract-level SCI_HighIncome
    SCI_LowIncome       = sum(SCI_LowIncome_Nation_zip_w*res_ratio)/sum(res_ratio)    # tract-level SCI_LowIncome
  ) %>%
  mutate(
    tract = as.numeric(tract)
  )



save(SCI_v3 , file="/02_Data/SCI_v3.RData")


